#############################
# packages

pacman::p_load(sjmisc, plotrix, matrixStats, expss)
#############################

##### exclude people who were not present in Wave 1, but were in Wave 3

fgz_long <- fgz_long %>%
  group_by(pid) %>% 
  mutate(drop = ifelse(all("W1T1" != unique(wave)), 1, 0)) %>% 
  filter(drop == 0) %>% ungroup() 

# count how many participants dropped because they did not participate in W1
# set <- fgz_long %>%  group_by(pid) %>%  slice(1)
# count(set$drop)


# drop if age <18


##### age
fgz_long <- fgz_long %>% 
  mutate(intyr = trunc(interviewdatum/10000)) %>% 
  mutate(intmth = trunc(interviewdatum/100-intyr*100)) %>% 
  mutate(agea = ifelse(pbirthmnth>intmth, intyr - pbirthyear-1,intyr - pbirthyear))


fgz_long <- fgz_long %>%
  filter(agea>17 | is.na(agea))


# variable generation

# ess-isced 6 categories
 fgz_long <- fgz_long %>% 
   mutate(eisced6 = case_when(
     pedu02 == 10 | pedu02 == 11 ~ 6, # Diplom, Master, phD
     pedu02 == 9  ~ 5,
     pedu02 == 7 | pedu02 == 8 ~ 4,
     pedu01 == 4 | pedu01 == 5 ~ 3,
     pedu01 == 2 | pedu01 == 3 |
       pedu02 == 1 | pedu02 == 2 | pedu02 == 5 | pedu02 == 6 ~ 2,
     pedu01 == 1 | pedu01 == 8 |
       pedu02 == 3 | pedu02 == 4 | pedu02 == 13 | pedu02 == 14 | pedu02 == 15 ~ 1,
     pedu01 == 6 | pedu01 == 7  ~ NA))
 
 table(fgz_long$eisced6)
 
 fgz_long <- fgz_long %>%
   mutate(eisced6 = factor(eisced6,
                           levels = c(1, 2, 3, 4, 5, 6),
                           labels = c("Less than lower secondary education", "Lower secondary education completed",
                                      "Upper secondary education completed", "Post-secondary non-tertiary education completed", 
                                      "B.A.", "M.A. or PhD")))
 


# income ladder

table(fgz_long$pladder01)

# age
fgz_long <- fgz_long %>% 
  mutate(age=case_when(agea >= 14 & agea < 18 ~ NA,
                       agea >= 18 & agea < 31 ~ 1,
                       agea >= 31 & agea < 41 ~ 2,
                       agea >= 41 & agea < 51 ~ 3,
                       agea >= 51 & agea < 61 ~ 4,
                       agea >= 61 ~ 5, #warum nicht ab Rente?
                                     TRUE ~ NA))
val_lab(fgz_long$age) = num_lab("1 18-30
                              2 31-40
                              3 41-50
                              4 51-60
                              5 61+ ")

#####gender
fgz_long <- fgz_long %>% 
  mutate(gender=case_when(pgender==1 ~ 0, # male
                          pgender==2 ~ 1, # female
                          TRUE ~ NA))  # diverse as NA because of low case number

val_lab(fgz_long$gender) = num_lab("0 male 
                              1 female")
table(fgz_long$gender)

fgz_long <- fgz_long %>% 
  mutate(male = case_when(pgender == 1 ~ 1,
                          pgender == 2 ~ 0,
                          TRUE ~ NA),
         female = case_when(pgender == 1 ~ 0,
                            pgender == 2 ~ 1,
                            TRUE ~ NA))

table(fgz_long$pgender)

# family status
table(fgz_long$pfamstat)



fgz_long <- fgz_long %>% 
  mutate(famstat = case_when(pfamstat == 1 ~ 1,
                             pfamstat == 2 ~ 5,
                             pfamstat == 3 ~ 3,
                             pfamstat == 4 | pfamstat == 5 ~ 2,
                             pfamstat == 6 | pfamstat == 7 ~ 4,
                             TRUE ~ NA))

table(fgz_long$famstat)

fgz_long <- fgz_long %>%
    mutate(famstat = factor(famstat,
                         levels = c(1, 2, 3, 4, 5),
                         labels = c("Married",
                                    "Divorced or separated",
                                    "Single",
                                    "Widowed",
                                    "Same sex partnership")))

# Eastern Germany

fgz_long <- fgz_long %>% 
  mutate(east = ostwest-1)

table(fgz_long$east)
table(fgz_long$ostwest)

# Employment groups

# Public Sector Employment

table(fgz_long$pempcond01)

fgz_long<- fgz_long %>% 
  mutate(pub_empl = case_when(pempcond01 == 1  ~  1,
                                 pempcond01 == 2  ~  0,
                                 pempcond01 <1 ~ NA))


table(fgz_long$pub_empl)        


table(fgz_long$pub_empl, fgz_long$poccstat,useNA = "ifany")   


table(fgz_long$pempcond01)



# Civil Servant

table(fgz_long$poccstat)


fgz_long <- fgz_long %>% 
  mutate(civ_serv = case_when(poccstat == 1  ~  0,
                                 poccstat == 2  ~  1,
                                 poccstat == 3  ~  0,
                                 poccstat == 4  ~  0,
                                 poccstat == 5  ~  0,
                                 poccstat >5 ~ NA,
                                 poccstat <1 ~ NA))
table(fgz_long$civ_serv)        

table(fgz_long$poccstat)

#Check changes or inconsistences


table(fgz_long$civ_serv, fgz_long$pub_empl) # 21 cases inconsistent


# Create employment sector

table(fgz_long$pub_empl)
table(fgz_long$civ_serv)



fgz_long <- fgz_long %>%
  mutate(pub_type = case_when(
    pub_empl == 1 & civ_serv == 1 ~ 2, # Civil servants
    pub_empl == 1 & is.na(civ_serv) ~ NA_integer_, # Unclear cases as NA
    pub_empl == 1 ~ 1,                    # Public employees
    pub_empl == 0 ~ 0,                    # Private employees
    TRUE ~ NA_integer_                       # Other cases as NA
  ))


table(fgz_long$pub_type)
table(fgz_long$pub_type, useNA = "ifany")



fgz_long <- fgz_long %>%
  mutate(pub_type = factor(pub_type,
                              levels = c(0, 1, 2),
                              labels = c("Private empl", "Public empl", "Civil serv")))

table(fgz_long$pub_type)

# left-right scale
fgz_long <- fgz_long %>% 
  mutate(lrscale = ppolatt+1)

table(fgz_long$lrscale)


fgz_long <- fgz_long %>% 
  mutate(lrscale_cat = case_when(lrscale <=2 ~ 1,
                                 lrscale <5 ~ 2,
                                 lrscale ==5 ~ 3,
                                 lrscale <=7 ~ 4,
                                 lrscale >7 ~ 5))


# popuilsm indices

fgz_long <- fgz_long %>% 
  mutate(populism = (ppopul01 + ppopul02 + ppopul03 + ppopul04 + ppopul05 + ppopul06 +
                          ppopul07 + ppopul08 + ppopul09) /9,
         anti.elitism = (ppopul01 + ppopul02 + ppopul03) /3,
         popular.sovereignty = (ppopul04 + ppopul05 + ppopul06) /3,
         homogeneity = (ppopul07 + ppopul08 + ppopul09) /3)

table(fgz_long$populism)
table(fgz_long$anti.elitism)
table(fgz_long$popular.sovereignty)
table(fgz_long$homogeneity)

#region

table(fgz_long$region, fgz_long$wave)

table(fgz_long$ostwest, fgz_long$wave)


fgz_long <- fgz_long %>% 
  mutate(eastDE = case_when(region == 2  ~ 2,
                            region == 1  ~ 1, 
                            ostwest == 2  ~ 2,
                            ostwest == 1  ~ 1,
                            is.na(region) ~ NA,
                            is.na(ostwest) ~ NA))

table(fgz_long$eastDE, fgz_long$wave)

